Analysis on Reinfection of Sexually Transmitted Diseases

Yichu Chen, Gu Gong, Kate Jones, Gianni Spiga

2022-11-29

The Data

data(std)
std

Exploratory Data Analysis

Checking Proportionality

Kaplan-Meier Curves

## Call:
## survdiff(formula = surv_object ~ iinfct, data = std)
## 
##                    N Observed Expected (O-E)^2/E (O-E)^2/V
## iinfct=gonorrhea 140       73     54.5   6.28042   7.50617
## iinfct=chlamydia 396      135    153.0   2.12201   3.81136
## iinfct=both      341      139    139.5   0.00166   0.00278
## 
##  Chisq= 8.5  on 2 degrees of freedom, p= 0.01

Hazard Ratios

Cumulative Hazards and CLogLog

## Call:
## coxph(formula = surv_object ~ iinfct, data = std)
## 
##   n= 877, number of events= 347 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)   
## iinfctchlamydia -0.4202    0.6569   0.1457 -2.884  0.00393 **
## iinfctboth      -0.2980    0.7423   0.1450 -2.055  0.03984 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6569      1.522    0.4937    0.8741
## iinfctboth         0.7423      1.347    0.5587    0.9863
## 
## Concordance= 0.524  (se = 0.016 )
## Likelihood ratio test= 7.93  on 2 df,   p=0.02
## Wald test            = 8.37  on 2 df,   p=0.02
## Score (logrank) test = 8.46  on 2 df,   p=0.01

- The cox.zph function was then applied to the model which resulted in a p-value of 0.24 reinforcing that the proportional hazards assumption is maintained. Therefore, we chose to apply the Cox model which is elaborated on the next slide.

Model Selection

The Full model

cox1 <-
  coxph(
    surv_object ~ iinfct + marital + race + os12m + os30d +
      rs12m + rs30d + abdpain + discharge + dysuria + condom + 
      itch + lesion + rash + lymph + vagina + dchexam + abnode +
      age + yschool + npartner,
    data = std
  )
summary(cox1)
## Call:
## coxph(formula = surv_object ~ iinfct + marital + race + os12m + 
##     os30d + rs12m + rs30d + abdpain + discharge + dysuria + condom + 
##     itch + lesion + rash + lymph + vagina + dchexam + abnode + 
##     age + yschool + npartner, data = std)
## 
##   n= 877, number of events= 347 
## 
##                      coef exp(coef)  se(coef)      z Pr(>|z|)   
## iinfctchlamydia -0.331853  0.717593  0.149264 -2.223  0.02620 * 
## iinfctboth      -0.263731  0.768180  0.149262 -1.767  0.07724 . 
## maritalMarried   0.054707  1.056231  0.431282  0.127  0.89906   
## maritalSingle    0.408462  1.504502  0.295237  1.384  0.16651   
## raceWhite       -0.112104  0.893951  0.141248 -0.794  0.42739   
## os12m1          -0.205985  0.813845  0.212025 -0.972  0.33129   
## os30d1          -0.337675  0.713427  0.238454 -1.416  0.15675   
## rs12m1           0.036907  1.037596  0.444944  0.083  0.93389   
## rs30d1          -0.194243  0.823458  0.565166 -0.344  0.73108   
## abdpain1         0.233865  1.263474  0.155288  1.506  0.13207   
## discharge1       0.113143  1.119792  0.114148  0.991  0.32159   
## dysuria1         0.162934  1.176960  0.155112  1.050  0.29352   
## condomnever     -0.263793  0.768133  0.119652 -2.205  0.02748 * 
## itch1           -0.149871  0.860819  0.154492 -0.970  0.33200   
## lesion1         -0.188159  0.828483  0.333106 -0.565  0.57217   
## rash1            0.003932  1.003940  0.392693  0.010  0.99201   
## lymph1          -0.030839  0.969632  0.547483 -0.056  0.95508   
## vagina1          0.349257  1.418013  0.174697  1.999  0.04559 * 
## dchexam1        -0.465508  0.627816  0.229914 -2.025  0.04290 * 
## abnode1          0.193753  1.213797  0.425224  0.456  0.64864   
## age              0.007855  1.007886  0.013891  0.565  0.57176   
## yschool         -0.127454  0.880334  0.039309 -3.242  0.00119 **
## npartner         0.076185  1.079162  0.054067  1.409  0.15881   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.7176     1.3935    0.5356    0.9615
## iinfctboth         0.7682     1.3018    0.5733    1.0292
## maritalMarried     1.0562     0.9468    0.4536    2.4596
## maritalSingle      1.5045     0.6647    0.8435    2.6835
## raceWhite          0.8940     1.1186    0.6778    1.1791
## os12m1             0.8138     1.2287    0.5371    1.2332
## os30d1             0.7134     1.4017    0.4471    1.1385
## rs12m1             1.0376     0.9638    0.4338    2.4818
## rs30d1             0.8235     1.2144    0.2720    2.4929
## abdpain1           1.2635     0.7915    0.9319    1.7130
## discharge1         1.1198     0.8930    0.8953    1.4006
## dysuria1           1.1770     0.8496    0.8684    1.5951
## condomnever        0.7681     1.3019    0.6076    0.9711
## itch1              0.8608     1.1617    0.6359    1.1652
## lesion1            0.8285     1.2070    0.4313    1.5916
## rash1              1.0039     0.9961    0.4650    2.1675
## lymph1             0.9696     1.0313    0.3316    2.8355
## vagina1            1.4180     0.7052    1.0069    1.9970
## dchexam1           0.6278     1.5928    0.4001    0.9852
## abnode1            1.2138     0.8239    0.5275    2.7932
## age                1.0079     0.9922    0.9808    1.0357
## yschool            0.8803     1.1359    0.8151    0.9508
## npartner           1.0792     0.9266    0.9707    1.1998
## 
## Concordance= 0.634  (se = 0.016 )
## Likelihood ratio test= 73.29  on 23 df,   p=4e-07
## Wald test            = 69.84  on 23 df,   p=1e-06
## Score (logrank) test = 71.68  on 23 df,   p=7e-07

Simplifying the Model

cox.model = coxph(surv_object ~ iinfct+condom+vagina
                  +dchexam+yschool, data = std)
summary(cox.model)
## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam + 
##     yschool, data = std)
## 
##   n= 877, number of events= 347 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## iinfctchlamydia -0.38468   0.68067  0.14618 -2.632  0.00850 ** 
## iinfctboth      -0.24681   0.78129  0.14561 -1.695  0.09006 .  
## condomnever     -0.29779   0.74245  0.11551 -2.578  0.00994 ** 
## vagina1          0.40660   1.50171  0.16739  2.429  0.01514 *  
## dchexam1        -0.37042   0.69044  0.22123 -1.674  0.09405 .  
## yschool         -0.14357   0.86626  0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6807     1.4692    0.5111    0.9065
## iinfctboth         0.7813     1.2799    0.5873    1.0393
## condomnever        0.7425     1.3469    0.5920    0.9311
## vagina1            1.5017     0.6659    1.0817    2.0848
## dchexam1           0.6904     1.4483    0.4475    1.0652
## yschool            0.8663     1.1544    0.8115    0.9247
## 
## Concordance= 0.603  (se = 0.017 )
## Likelihood ratio test= 45.32  on 6 df,   p=4e-08
## Wald test            = 46.51  on 6 df,   p=2e-08
## Score (logrank) test = 46.77  on 6 df,   p=2e-08

Residuals

Martingale

Transformation of Quantitative to a Categorical Variable

Schoenfield

##          chisq df    p
## iinfct  3.5184  2 0.17
## condom  1.0539  1 0.30
## vagina  1.2835  1 0.26
## dchexam 0.4085  1 0.52
## yschool 0.0214  1 0.88
## GLOBAL  6.1176  6 0.41
# years of school  
figdfb1 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 6],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 4], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Years of School (Categorical)", title = "dfbeta Values for Years of School"),
  tooltip = "text"
)

# Discharge at Exam
figdfb2 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 5],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 5], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Discharge at Exam", title = "dfbeta Values for Discharge at Exam"),
  tooltip = "text"
)

# Vaginal Involvement
figdfb3 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 4],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 6], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Vaginal Involvement at Exam", title = "dfbeta Values for Vaginal Involvement at Exam"),
  tooltip = "text"
)


# Condom
figdfb4 <- ggplotly(
  ggplot() + aes(
    x = std$obs,
    y = b.dfb[, 3],
    text = paste("Obs:", std$obs, "\ndfBeta:", round(b.dfb[, 6], 4))
  ) + geom_point() + labs(x = "Observation Number", y = "Condom", title = "dfbeta Values for Condom Usage"),
  tooltip = "text"
)


fig <- subplot(
  figdfb1,
  figdfb2,
  figdfb3,
  figdfb4,
  nrows = 2,
  shareX = TRUE,
  shareY = TRUE
) %>% layout(title = "dfBeta values for Years of Schooling, Discharge at Exam, Vaginal Involvement, \nand Condom Usage")

# Update title
annotations = list( 
  list( 
    x = 0.2,  
    y = 1.0,  
    text = "Years of Schooling",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ),  
  list( 
    x = 0.8,  
    y = 1,  
    text = "Discharge at Exam",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ),  
  list( 
    x = 0.2,  
    y = 0.475,  
    text = "Vaginal Involvement",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ),
  list( 
    x = 0.8,  
    y = 0.475,  
    text = "Condom Usage",  
    xref = "paper",  
    yref = "paper",  
    xanchor = "center",  
    yanchor = "bottom",  
    showarrow = FALSE 
  ))

fig <- fig %>%layout(annotations = annotations) 
fig

dfBeta

Cox-Snell

# Outliers

In Conclusion

Our Best Model

## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam + 
##     yschool, data = std)
## 
##   n= 877, number of events= 347 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## iinfctchlamydia -0.38468   0.68067  0.14618 -2.632  0.00850 ** 
## iinfctboth      -0.24681   0.78129  0.14561 -1.695  0.09006 .  
## condomnever     -0.29779   0.74245  0.11551 -2.578  0.00994 ** 
## vagina1          0.40660   1.50171  0.16739  2.429  0.01514 *  
## dchexam1        -0.37042   0.69044  0.22123 -1.674  0.09405 .  
## yschool         -0.14357   0.86626  0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6807     1.4692    0.5111    0.9065
## iinfctboth         0.7813     1.2799    0.5873    1.0393
## condomnever        0.7425     1.3469    0.5920    0.9311
## vagina1            1.5017     0.6659    1.0817    2.0848
## dchexam1           0.6904     1.4483    0.4475    1.0652
## yschool            0.8663     1.1544    0.8115    0.9247
## 
## Concordance= 0.603  (se = 0.017 )
## Likelihood ratio test= 45.32  on 6 df,   p=4e-08
## Wald test            = 46.51  on 6 df,   p=2e-08
## Score (logrank) test = 46.77  on 6 df,   p=2e-08